# -------------------------------------------------------------------
# Purpose: Creates Figure B11
# Author:  Max Posch, 25/07/2025
# Usage:   Source this script to generate both panels of the figure
# -------------------------------------------------------------------
# Check that required paths exist
stopifnot(dir.exists(pdataanalysis))
stopifnot(dir.exists(pdataconfanalysis))
stopifnot(dir.exists(poutputappendix))


## Load data
load(file.path(pdataanalysis, "countyLevel19001940.RData"))

# Left panel: Patents per 1,000 people
temp <- copy(countyLevel19001940) %>% drop_na(iv_lo_entropy_namelast_mp_adjp_fe_immig_ws, iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, sum_patents_pc_1900_f_w)
temp[, iv_lo_entropy_namelast_mp_adjp_fe_immig_ws := resid(feols(iv_lo_entropy_namelast_mp_adjp_fe_immig_ws ~ iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], data = temp))]
temp[, sum_patents_pc_1900_f_w := resid(feols(sum_patents_pc_1900_f_w ~ iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], data = temp))]
est1 <- binsreg(sum_patents_pc_1900_f_w, iv_lo_entropy_namelast_mp_adjp_fe_immig_ws, data = temp)
result <- est1$data.plot$`Group Full Sample`
p <- ggplot() +
    labs(
        x = "Predicted surname diversity",
        y = NULL,
        subtitle = "Patents per 1,000 people",
    ) +
    geom_point(data = result$data.dots, aes(x = x, y = fit), size = 2, alpha = 1) +
    theme_minimal()
plotname <- file.path(poutputappendix, "figureB11left.pdf")
ggsave(plotname, p, width = 4.6, height = 4.6, units = "in")

cat("Figure saved to:", plotname, "\n")


# Right panel: Breakthrough patents per 1,000 people
temp <- copy(countyLevel19001940) %>% drop_na(iv_lo_entropy_namelast_mp_adjp_fe_immig_ws, iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, sum_break_p80_rrfsim05_pc_1900_f_w)
temp[, iv_lo_entropy_namelast_mp_adjp_fe_immig_ws := resid(feols(iv_lo_entropy_namelast_mp_adjp_fe_immig_ws ~ iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], data = temp))]
temp[, sum_break_p80_rrfsim05_pc_1900_f_w := resid(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], data = temp))]
est1 <- binsreg(sum_break_p80_rrfsim05_pc_1900_f_w, iv_lo_entropy_namelast_mp_adjp_fe_immig_ws, data = temp)
result <- est1$data.plot$`Group Full Sample`
p <- ggplot() +
    labs(
        x = "Predicted surname diversity",
        y = NULL,
        subtitle = "Breakthrough patents per 1,000 people",
    ) +
    geom_point(data = result$data.dots, aes(x = x, y = fit), size = 2, alpha = 1) +
    theme_minimal()
plotname <- file.path(poutputappendix, "figureB11right.pdf")
ggsave(plotname, p, width = 4.6, height = 4.6, units = "in")

cat("Figure saved to:", plotname, "\n")
